##############################################################
##############################################################
### Replication Materials for
### Stefan Müller and Liam Kneafsey:
### Evidence for the Irrelevance of Irrelevant Events
### Political Science Research and Methods
###
### Please get in touch with the authors if you have any questions: 
### stefan.mueller@ucd.ie
### Note: 0000_readme.pdf contains 
### instructions and details about each script and dataset
##############################################################
##############################################################

# load packages
library(dplyr)   # CRAN v1.0.5
library(tidyr)   # CRAN v1.1.3
library(MuMIn)   # CRAN v1.43.17
library(stringr) # CRAN v1.4.0
library(cowplot) # CRAN v1.1.1
library(car)     # CRAN v3.0-10
library(ggplot2) # CRAN v3.3.3

here::here()
## alternatively, set working directory manually
# setwd("...")

source("function_theme_base.R")

## parts of this code are adjusted from
## https://dcosme.github.io/2019/02/26/specification-curve-example/

## load dataset with elections
dat_general_raw <- readRDS("data_elections_general_full.rds") 


## only keep: incumbents whose county either lost or won a game
## = all "treated" incumbents
## the coefficienf for winner_loser_before_num will show 
## whether an incumbent increases her vote share when the county team
## won shortly before the election (winner_loser_before_num = 1)

dat_general_incumbents_treated <- dat_general_raw %>% 
    filter(incumbent == "Incumbent (Candidate elected in t-1)") %>% 
    filter(winner_loser_before != "Untreated") %>% 
    mutate(decade = factor(paste0(substr(election_id, 1, 3), "0"))) %>% 
    mutate(winner_loser_before = as.character(winner_loser_before)) %>% 
    mutate(winner_loser_before_num = ifelse(winner_loser_before == "Win", 1, 0))


#                # create a dummy for strongholds
dat_general_incumbents_treated <- dat_general_incumbents_treated %>% 
    mutate(stronghold_dummy = ifelse(str_detect(stronghold_win_loss, "Stronghold"), 
                                     "Stronghold", "Non-stronghold"))


## set na.action for dredge
options(na.action = "na.fail")


## select only the variables included in the regression models 
## and omit missing values to have the same N across all models

dat_general_incumbents_treated_no_na <- dat_general_incumbents_treated %>% 
    ungroup() %>% 
    select(diff_share, county_gaa, 
           winner_loser_before_num, turnout, 
           election_id, sport, margin,
           party_recoded, stronghold_dummy) %>% 
    na.omit() 


summary(dat_general_incumbents_treated_no_na$winner_loser_before_num)

## number of complete observations in this subset
nrow(dat_general_incumbents_treated_no_na)

## run full model
full_model_general <- lm(diff_share ~ 
                             winner_loser_before_num +
                             county_gaa +
                             turnout +
                             margin +
                             election_id +
                             sport +
                             party_recoded +
                             stronghold_dummy,
                         data = dat_general_incumbents_treated_no_na)


## run all possible nested models but only keep thos models that include
## the treatment variable (winner_loser_before)

all_models_general <-  dredge(full_model_general) 

nrow(all_models_general)

## filter only models that include the main independent variable of interest (winner/loser)
all_models_general <- filter(all_models_general, !is.na(winner_loser_before_num))

nrow(all_models_general)

## get coefficients and standard errors for all estimates in each model
all_models_general_coefs <- coefTable(all_models_general)

length(all_models_general_coefs)

## transform list of coefficients and standard errors to a data frame
df_all_models_general_coefs <- do.call(rbind.data.frame, all_models_general_coefs)

## get coefficient name from rownames
df_all_models_general_coefs$coefficient <- rownames(df_all_models_general_coefs)


## filter only those coefficients relating to winner_loser_before_num
## because these values are needed to extract the standard errors of the coefficients
df_coefs_se_winner_loser_before <- filter(df_all_models_general_coefs, 
                                          str_detect(coefficient, "winner_loser")) 

#                # create variable for merging below
df_coefs_se_winner_loser_before <- df_coefs_se_winner_loser_before %>% 
    mutate(winner_loser_before_num = Estimate) 

## merge standard errors for the independent variable of interest
## by using the point estimates of winner_loser_before_num
## as the merging variable (not elegant, but not possible to do it differently)

## check that estimate colums are unique 
unique_valid_models <- nrow(filter(all_models_general, !is.na(winner_loser_before_num)))
unique_valid_models

unique_coefs <- length(unique(all_models_general$winner_loser_before_num))
unique_coefs


unique_coefs_se <- length(unique(df_coefs_se_winner_loser_before$winner_loser_before_num))
unique_coefs_se

## this code works only if the number of unique 
## coefficients is the same in both dataframes!
## Be very careful if you adjust this code for your own work.

## stop the code if the numbers differ
stopifnot(unique_coefs_se == unique_coefs)

all_models_winner_loser_before <- left_join(all_models_general,
                                            df_coefs_se_winner_loser_before,
                                            by = c("winner_loser_before_num"))


nrow(all_models_winner_loser_before)
nrow(all_models_general)

## get variables included in the model
variable.names <-  names(dat_general_incumbents_treated_no_na)[-1]


## prepare data for plot
plot_data_general <-  all_models_winner_loser_before %>%
    filter(!is.na(Estimate)) %>% ## remove models without coefficient for winner_loser_before_num
    arrange(Estimate) %>% ## arrange estimates in ascending order
    mutate(specification = row_number()) 

nrow(plot_data_general) ## number of models


## check how many coefficients are positive/negative and how many are
## statistically significant

plot_data_general_check <- plot_data_general %>% 
    mutate(significant_95 = (Estimate - 1.96 * `Std. Error`) > 0 |
               (Estimate + 1.96 * `Std. Error`) < 0) %>% 
    mutate(direction = ifelse(Estimate > 0, "Positive",
                              ifelse(Estimate < 0, "Negative", "0")))


table(plot_data_general_check$direction,
      plot_data_general_check$significant_95)


plot_top_general <- plot_data_general %>%
    ggplot(aes(x = specification, 
               y = Estimate)) + 
    geom_linerange(aes(ymin = Estimate - 1.96 * `Std. Error`,
                       ymax = Estimate + 1.96 * `Std. Error`),
                   colour = "grey50") +
    geom_point(size = 1) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "red") + ## dashed line for null model AIC
    labs(x = "", y = "Coefficient estimate for Winner")
plot_top_general

## note: warning will appear because not all models contain all variables 
## (therefore, the attributes are not identical and will be dropped)

plot_data_general_long <-  plot_data_general %>%
    gather(variable, value, eval(variable.names)) %>% 
    mutate(value = ifelse(!is.na(value), "|", "")) ## plot a line if included in the model


## remove winner_loser_before_num because this variable is 
## included in all models and this coefficient is displayed in the graph

plot_data_general_long <- plot_data_general_long %>% 
    filter(variable != "winner_loser_before_num")


table(plot_data_general_long$variable)

recode_vars <- c("'county_gaa'='County team';
                 'difference'='Days after match';
                 'election_id'='Election';
                 'margin'='Margin of result';
                 'party_recoded'='Party';
                 'sport'='Sport';
                 'stronghold_dummy'='Stronghold';
                 'turnout'='Turnout'")



plot_data_general_long <- plot_data_general_long %>% 
    mutate(variable = car::recode(variable, recode_vars))


plot_bottom_general <-   ggplot(plot_data_general_long,
                                aes(specification, variable)) +
    geom_text(aes(label = value), alpha = 1) +
    labs(x = "Specification number", y = "Variables")

plot_bottom_general

## combine both plots
plot_grid(plot_top_general, plot_bottom_general, ncol = 1, align = "v",
          rel_heights = c(0.6, 0.4))
ggsave("fig_a15.pdf",
       width = 9, height = 5.5)



## script executed successfully on
date()

sessionInfo()
